#treatment of overall dataset for use in multivariate stats setwd("/Users/emmatimminsschiffman/Desktop/3' targeted illumina sequencing C. gigas/isotig RNA-seq") contig.exp<-read.csv('contig expression for multivariate.csv', header=T, row.names=1) #get rid of rare (expressed in less than half the oysters) and super abundant (expressed in most oysters) proteins contig.exp1<-drop.var(contig.exp, min.fo=8) str(contig.exp1) #4228 genes left out of 10341 contig.exp2<-drop.var(contig.exp, max.po=0.95) str(contig.exp2) #no genes left #will do analyses on entire dataset and dataset minus the rare genes #log(x+1 transformation) contig.exptra<-(contig.exp+1) contig.exptra<-data.trans(contig.exptra, method='log', plot=F) contig.exp1tra<-(contig.exp1+1) contig.exp1tra<-data.trans(contig.exp1tra, method='log', plot=F) #hierarchical agglomerative clustering using bray-curtis dissimilarity coefficient contig.expbray<-vegdist(contig.exptra, method='bray') contig.exp1bray<-vegdist(contig.exp1tra, method='bray') contig.clust<-hclust(contig.expbray, method='average') contig.clust1<-hclust(contig.exp1bray, method='average') plot(contig.clust, main='Average-Linkage Dendrogram, all isotigs') plot(contig.clust1, main='Average-Linkage Dendrogram, no low abundance isotigs') #NMDS contig.nmds<-metaMDS(contig.exptra, distance='bray', k=2, trymax=100) #low stress vec.contig<-envfit(contig.nmds$points, contig.exptra, perm=100) ordiplot(contig.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.contig, p.max=0.01, col='blue', cex=0.5) contig.nmds1<-metaMDS(contig.exp1tra, distance='bray', k=2, trymax=100) vec1.contig<-envfit(contig.nmds1$points, contig.exp1tra, perm=100) ordiplot(contig.nmds1, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec1.contig, p.max=0.01, col='blue', cex=0.5) treatment<-as.factor(c('high', 'high', 'high', 'high', 'ambient', 'ambient', 'ambient', 'ambient')) ordihull(contig.nmds1, treatment, label=T) #ANOSIM #normalize dataset by row contig.row<-data.stand(contig.exp, method='total', margin='row', plot=F) #create bray-curtis dissimilarity matrix contig.d<-vegdist(contig.row, 'bray') y.anosim<-anosim(contig.d, grouping=treatment) summary(y.anosim) #R=-0.08333, p=0.734 contig1.row<-data.stand(contig.exp1, method='total', margin='row', plot=F) #create bray-curtis dissimilarity matrix contig.d1<-vegdist(contig1.row, 'bray') y.anosim1<-anosim(contig.d1, grouping=treatment) summary(y.anosim1) #R=-0.08333, p=0.709